#!/usr/bin/perl -w

###############################################################################
use strict; # Always working in strict mode with warnings on
###############################################################################
use File::Basename;
use File::Copy;
use Time::HiRes qw(tv_interval gettimeofday);
use List::Util qw(min max);
use XML::Simple; # install it from cpan 
use Data::Dumper;

use File::Path qw(make_path remove_tree); # install it from cpan

###############################################################################
# Global variables
###############################################################################
# Messaging and this script name/location/version related.
my $PROGNAME 	= basename($0);
my $PROGVER 	= "0.1";
my $config_file = "config.xml";
my $analyzes_file = "build_analyzer.xml";

my %config; 	# conteins input datas which are loaded from the xml
my %analyzes;   # conteins the output of build analizer, that is information
				# regarding duplicated and unused include paths and so on
my @makefiles; 	# makefiles are read from the file described in config_file
my $tmp_file;
#my %unused_inc_paths; # contents unused include paths for all makefiles
my %all_incs;

my @common_inc_paths;

###############################################################################
# Subroutine declarations
###############################################################################
sub show_help($);
sub create_temp_directory();
sub remove_temp_directory();
sub read_configs();
sub read_analyzes();
sub get_makefiles();
sub get_argument($);
sub get_actual_include_paths();
sub get_include_paths_of($$);
sub clean_unuseds();
sub clean_duplicates();
sub remove_inc_from_makefile($$$);
sub compilation_error($);
sub clean_unuseds_in_current($);
sub has_argument($);
sub compact();
sub devide_content($$);
sub remove_duplicated($);

###############################################################################
# Main Block 
###############################################################################
if (exists $ARGV[0] and $ARGV[0] =~ /^\-h(elp)?$/)  {
	show_help(0);
	shift;
}

$config_file = get_argument('config');
if (! defined $config_file) {
	die("Error: couldn't find config file.$!");
}
read_configs();
read_analyzes();

if (defined $config{'errors'}) {
	die("Error: There are compilation errors.$!");
}

create_temp_directory();
get_makefiles();
get_actual_include_paths();
if(has_argument("dup")){
	clean_duplicates();
} elsif (has_argument("unused")) {
	clean_unuseds();
} elsif (has_argument('compact')) {
	#compact();
}
remove_temp_directory();

###############################################################################
# Subroutine definitions
###############################################################################

#=============================================================================
# builds projects corresponding to the data list
sub get_argument($)
{
	my ($p) = @_;
	my $arguments = "&" . join('&', @ARGV);
	if ($arguments =~ /(.*)\&-$p[ |=|"](.*)[\&]/) {
		return $2;
	} else {
		if ($arguments =~ /(.*)\&-$p[ |=|"](.*)/) {
			return $2;
		}
	}
	return undef;
}

#=============================================================================
# returns 1 if in the arguments list there is given argument, otherwise returns
# 0
sub has_argument($)
{
	my ($p) = @_;
	if(join(" ", @ARGV) =~ /-?$p/) {
		return 1;	
	} else {
		return 0;	
	}
}

#=============================================================================
# This function compact the makefiles by rewriting.
sub compact()
{
	print "compact\n";	
}

#=============================================================================
# reads analyzer output from the xmp file.
sub read_analyzes()
{
	# getting all inputs from the analyzes_file
	my $xml_config = XML::Simple->new();
	my $analyzes_ref = $xml_config->XMLin($config{'analyzes'}, ForceArray=>1);
# DEBUG
	#print Dumper($analyzes_ref->{'anon'});
# END
	%analyzes = %{$analyzes_ref->{'anon'}};
}

#=============================================================================
# Checks whether there are compilation errors with the given makefile or not
sub compilation_error($)
{
	my ($makefile) = @_;
	my $file = "$config{'prefix'}/$makefile";
	if ($file =~ /(.*)[m|M]akefile/) {
		system ("make -C $1 clean >& /dev/null");
		system ("make -C $1 >& $tmp_file");
	} else {
		die "Wrong makefile name $file.";
	}
	open(TMP, $tmp_file);
	my @content = <TMP>;
	if ($content[$#content - 1] =~ /.*\s[Ee]rror.*/) {
		close($tmp_file);
		return 1;
	} elsif (join("", @content) =~ /.*No such file or directory.*/) {
		close($tmp_file);
		return 1;
	}
	close($tmp_file);
	return 0;
}


# Gets a file and a variable name, devides the content of the file into 3 pices,# first every thing before the variable, second the variable and its value, the # thried everything after the variable.
sub devide_content($$)
{
	my ($file, $var_name) = @_;
	open(FILE, $file) or die "Cannot open $file, $!";
	my %content;
	my $part = "first";
	while (<FILE>) {
		if (/^$var_name\s*:?=.*/) {
			$part = "second";
		}
		if (($part eq "second") and (/^[\s|#]*$/)) {
			$part = "third";
		}
		$content{$part} .= $_;	
	}
	close(FILE);
	return \%content;
}

#=============================================================================
# 
sub replace_in_makefile($$$)
{
	my ($makefile, $var_name, $accepted_incs) = @_;
	my $file = "$config{'prefix'}/$makefile";
	my $makefile_content = devide_content($file, $var_name);
	open (MKF_GEN, ">$file") or die "Cannot open $file, $!";
	print MKF_GEN $makefile_content->{'first'};
	my $comment = "# This variable was generated by $PROGNAME v$PROGVER\n";
	if (! ($makefile_content->{'first'} =~ /.*$comment.*/)) {
		print MKF_GEN $comment;
	}
	print MKF_GEN "$var_name := \\\n\t", join("\\\n\t", @$accepted_incs);
	print MKF_GEN "\\\n";
	print MKF_GEN $makefile_content->{'third'};
	close (MKF_GEN);
}

#=============================================================================
# Generates makefile without given include path on it
# during this action it seperates a makefile in logical 3 partes, the first is
# every thing till $var_name, second part is $var_name and its value in the 
# makefile and the thired part is everything after $var_name and its value.
sub remove_inc_from_makefile($$$)
{
	my ($makefile, $var_name, $unused_incs) = @_;
	my $all = $all_incs{$makefile};
	my @accepted_incs; 
	foreach my $unchecked (@$all) {
		my $add = 1;
		foreach (@$unused_incs) {
			if (($_ eq $unchecked)) {
				$add = 0;
			}
		}
		if ($add) {
				push (@accepted_incs, $unchecked);
		}
	}
	replace_in_makefile($makefile, $var_name, \@accepted_incs);
}

#=============================================================================
# returns given arrays reference without duplicated elements on it
sub remove_duplicated($)
{
	my ($all) = @_;
	my %a = map {$_ => 1} @$all;
	my @r = keys %a;
	return \@r;
}

#=============================================================================
# 
sub duplicates_of($$$)
{
	my ($inc, $d1, $d2) = @_;
	my %a;
	for (my $i = 0; $i < scalar(@$d1); ++$i) 
	{
		if ($inc eq $d1->[$i]) {
			$a{$d2->[$i]} = 1;
		}
	}
	return keys %a;
}

#=============================================================================
# 
sub clean_duplicates_phase2($$)
{
	my ($makefile, $duplicates) = @_;
	my $all = $all_incs{$makefile};
	my $d1 = $analyzes{$makefile}->{'duplicates_1'};
	my $d2 = $analyzes{$makefile}->{'duplicates_2'};
	my @r_all = reverse(@$all); 
	my @r_accepted;
INC:foreach my $i (@r_all) {
		foreach my $a (@common_inc_paths) {
			if ($i eq normalize($a)) {
				next INC;
			} else {
				foreach my $b (duplicates_of($i, $d1, $d2)) {
					if (normalize($a) eq $b) {
						next INC;
					}
				}
			}
		}

		for (my $j = 0; $j < scalar(@$duplicates); ++$j) {
			if ($i eq $duplicates->[$j]) {
				splice(@$duplicates, $j, 1);
				next INC;
			}
		}

		push (@r_accepted, $i);
	}

	my @accepted = reverse(@r_accepted);
	my $z = scalar(@$all) - scalar(@common_inc_paths) - scalar(@accepted);
	print "  removed $z duplicated include paths.\n";
	replace_in_makefile($makefile, $config{'varname'}, \@accepted);
}

#=============================================================================
# clean given makefile from unused include paths
sub clean_unuseds_in_current($)
{
	my ($makefile) = @_;
	my @to_check;
	my $inc_paths = $analyzes{$makefile}->{'unuseds'};
	foreach my $inc (@$inc_paths) {
		push(@to_check, $inc);	
		remove_inc_from_makefile($makefile, $config{'varname'}, \@to_check);
		if (compilation_error($makefile)) {
			splice(@to_check, $#to_check, 1);
		}	
	}
	print "  removed ", scalar(@to_check), " unused include paths\n";
	remove_inc_from_makefile($makefile, $config{'varname'}, \@to_check);
}

#=============================================================================
# in this phase of cleaning we are removeing unuseful include paths from the
# "duplicated include paths" list. First, if A is duplicate of B (A->B) so B
# is duplicate of A as well (B->A), but to use we need only one of them, so
# this function leave one of them.
sub clean_duplicates_phase1($)
{
	my ($makefile) = @_;
	my @r;
	my @d1 = @{$analyzes{$makefile}->{'duplicates_1'}};
	my @d2 = @{$analyzes{$makefile}->{'duplicates_2'}};
	for (my $i = 0; $i < scalar(@d1); ++$i) {
		if ($d1[$i] eq $d2[$i]) {
			if (! grep {$_ eq $d1[$i]} @r) {
				push(@r, $d1[$i]);
			}
			splice(@d1, $i, 1);
			splice(@d2, $i, 1);
		}
	}
	for (my $i = 0; $i < scalar(@d1); ++$i) {
		if (! grep {$_ eq $d1[$i]} @r) {
			if (! grep {$_ eq $d2[$i]} @r) {
				push(@r, $d1[$i]);
			}
		}
	}
	replace_in_makefile($makefile, $config{'varname'}, \@r);
	my $z = scalar(@{$all_incs{$makefile}}) - scalar(@r);
	print "  removed $z duplicated include paths.\n";
}

#=============================================================================
# Cleans a set of makefiles from duplicated inlide paths by the help of
# build_analizer report. Removes each path from the makefile, tries to build
# the project, if compilation doesn't pass returns path to it's place.  
sub clean_duplicates()
{
	foreach (keys %analyzes) {
		my $makefile = $_;
		if (compilation_error($makefile)) {
			print " ERROR: compile errors without removing any ".
					"duplicated path in: $makefile.\n";
		} else {
			next if (! defined $analyzes{$makefile}->{'duplicates_1'});
			clean_duplicates_phase1($makefile);
			#clean_duplicates_phase2($makefile, \@optimized_duplicates);
		}
	}
}

#=============================================================================
# Cleans a set of makefiles from unused inlide paths by the help of
# build_analizer report. Removes each path from the makefile, tries to build
# the project, if compilation doesn't pass returns path to it's place.  
sub clean_unuseds()
{
	my @checked_unuseds;
	foreach (keys %analyzes) {
		my $makefile = $_;
		if (compilation_error($makefile)) {
			print " ERROR: compile errors without removing any unused path \n".
				" makefile: $makefile.\n";
		} else {
			clean_unuseds_in_current($makefile);
		}
	}
}


#=============================================================================
# adds / (slash) at the end of the argument if there are not one
sub normalize($)
{
	my ($a) = @_;
	if ($a =~ /.*\/$/) {
		return $a;
	} else {
		return "$a\/";
	}
}

#=============================================================================
# in the read makefiles changes the include paths to simple format, 
# for example if there are -I/path/{subpath1,subpath2}, it'll change to
# -I/path/subpath1, -I/path/subpath2.
# rewrites the current makefile
# The retured array has uniqe elements, that is there are no two elements with
# the same value. It was done at this point because it significantly reduse the
# execution time of both duplicated include paths removing and unused include
# paths removing. 
sub get_include_paths_of($$)
{
	my ($makefile, $var_name) = @_;
	my $file = "$config{'prefix'}/$makefile";
	system("make -pnf $file > $tmp_file");
	open TMP_FILE, $tmp_file or die "Cannot open $tmp_file, $!";
	my @inc_array;
	while (<TMP_FILE>) {
		if (/$var_name\s*.?=(.*)$/) {
			my $inc_path = $1;
			chop $inc_path;
			foreach (split(" ", $inc_path)) {
				if (/(-I.*)\{(.*)\}$/) {
					foreach(split(",", $2)) {
						push(@inc_array, normalize("$1$_"));
					}
				} else {
						push(@inc_array, normalize("$_"));
				}
			}
		}
	}
#	$all_incs{$makefile} = remove_duplicated(\@inc_array);
	$all_incs{$makefile} = \@inc_array;
#DEBUG
	#print Dumper(%all_incs);
#END
	close TMP_FILE;
}

#=============================================================================
# in the read makefiles changes the include paths to simple format, 
# for example if there are -I/path/{subpath1,subpath2}, it'll change to
# -I/path/subpath1, -I/path/subpath2.
# rewrites the current makefile
sub get_actual_include_paths()
{
	foreach (@makefiles) {
		get_include_paths_of($_, $config{'varname'});
	}	
}

#=============================================================================
# reads config_file
sub read_configs()
{
	# getting all inputs from the config_file
	my $xml_config = XML::Simple->new();
	my $config_ref = $xml_config->XMLin($config_file);
# DEBUG
	#print Dumper($config_ref);
# END
	%config = %$config_ref;
}

#=============================================================================
# removes temp directory in path specified in config_file
sub remove_temp_directory()
{
	remove_tree("/tmp/$PROGNAME") 
		or die "Cannot remove temprorary directory.\n";	
}

#=============================================================================
# creates temp directory in path specified in config_file
sub create_temp_directory()
{
	if (! -e "/tmp/$PROGNAME") {
		make_path("/tmp/$PROGNAME") 
			or die "Cannot create temprorary directory.\n";	
	}
	$tmp_file = "/tmp/$PROGNAME/__tmp_";
}

#=============================================================================
# reads makefiles list and stores them in the global hash 'makefiles'
sub get_makefiles()
{
	if(ref($config{'makefiles'}) eq 'ARRAY') {
		foreach (@{$config{'makefiles'}}) {
			next if (m/^(\s)*$/);	
			chomp;
			if (/.*\/[M|m]akefile/) {
				push(@makefiles, $_);
			} else {
				print "$_ - is not an proper makefile\n";
			}
		}
	} else {
		push(@makefiles, $config{'makefiles'});
	}
}

#===============================================================================
# Show help message and exit with given exit code (0 is assumed success status)
# In: exit code
sub show_help($)
{
    my $status = shift;

print qq!$PROGNAME                     LTXC                     $PROGNAME

NAME
  $PROGNAME - LTXC - Gets unused include paths xml file, which is written by 
build_analizer, tries to remove each include path, if after compilation there 
are errors returns removed include path to its place.

SYNOPSYS
  $PROGNAME [option] list_of_makefiles

DESCRIPTION

OPTIONS
  -h[elp]            Print this help screen and exit
    
ENVIRONMENT

RESTRICTIONS
  Tested with GNU C/C++ compilers only.

SEE ALSO
  gcc(1) g++(1)

AUTHOR
  Tigran Hovhannisyan (tigran_hovhannisyan\@ltxc.com)

REVISION HISTORY
!;
    exit($status);
} # show_help()
################################################################################
#                          E N D  O F  S C R I P T
################################################################################
